#!/bin/bash
#############################################################################
#   this script scan all Program|Series|Category_xxxx_yyyymmddhhMMss.xml and 
# only keeps the latest ones accordingto the timestamp of file name
#############################################################################

output_dir=$1

cd $output_dir
fileArr=`ls *.xml | sort`
file1='' # heading file
file2='' # tailing file

echo '[info] begin to check all duplicated files...'
i=0
for file in $fileArr
do
#    echo "[info] checking file $file..."
    file1=$file
    f1=`echo $file1 | cut -d'_' -f2`
    f2=`echo $file2 | cut -d'_' -f2`
    if [ ! -z $f1 ] && [ ! -z $f2 ] && [ $f1 == $f2 ] # not reach a new category
    then # keep the last one of previous category
#    	echo "[debug] removing file $file2..."
        rm -fv $file2
        i=$((i+1))
    fi
    file2=$file1
done
echo '*****************************************'
echo '[info] job is done!'
echo "[info] original file number: ${#fileArr}"
echo "[info] deleted file number: $i"
echo '*****************************************'

